import requests
from bs4 import BeautifulSoup
import xlsxwriter
import time

def CleanData(InfoString):
    InfoString = InfoString.replace('\n', '').replace('\r', '').replace(' ', '')
    InfoString = InfoString.replace('年', '-', 1)
    InfoString = InfoString.replace('月', '-', 1)
    InfoString = InfoString.replace('日', '', 1)
    return InfoString

def ExtractBJWeather():
    BJWeatherExcel = xlsxwriter.Workbook('北京天气_2020到2025.xlsx')
    head = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36',
        'Connection': 'close'
    }

    for year in range(2020, 2026):
        worksheet = BJWeatherExcel.add_worksheet(str(year))
        worksheet.write(0, 0, '日期')
        worksheet.write(0, 1, '天气状况')
        worksheet.write(0, 2, '气温')
        worksheet.write(0, 3, '风力风向')
        row = 1

        for month in range(1, 13):
            url = f"http://www.tianqihoubao.com/lishi/beijing/month/{year}{month:02d}.html"
            print(f"正在请求：{url}")
            try:
                response = requests.get(url, headers=head, timeout=10)
                response.encoding = 'gb2312'  # 网站默认编码为 gb2312
                if response.status_code != 200:
                    print(f"请求失败：{url}")
                    continue

                soup = BeautifulSoup(response.text, 'lxml')
                rows = soup.find_all('tr')

                for i, tr in enumerate(rows):
                    if i == 0:
                        continue  # 第一行是表头
                    tds = tr.find_all('td')
                    for col, td in enumerate(tds):
                        content = CleanData(td.get_text())
                        worksheet.write(row, col, content)
                    row += 1

                time.sleep(1)  # 加延迟防止被封

            except Exception as e:
                print(f"请求出错，跳过该月：{url}，错误信息：{e}")
                time.sleep(5)
                continue

        print(f"...已完成 {year} 年的数据")

    BJWeatherExcel.close()
    print('-----------------全部完成-----------------')

if __name__ == '__main__':
    ExtractBJWeather()
